{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b4e2a99c-b639-4711-b47b-8b4fe99ae424",
   "metadata": {},
   "source": [
    "# Sparse Dense and Hybrid Search"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85461c2b-6e2e-4b26-8b28-60f1a03bb7c9",
   "metadata": {},
   "source": [
    "## Remove old Weaviate DB files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f42e56c-b02b-4e58-9c8e-5f6da45d1b0e",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "!rm -rf ~/.local/share/weaviate"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a543c34-1f7b-4e35-b235-ba9a871f690c",
   "metadata": {},
   "source": [
    "## Recreate the example\n",
    "With the same data as in the previous lesson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "262599c7",
   "metadata": {
    "height": 217
   },
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "# Download the data\n",
    "resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')\n",
    "data = json.loads(resp.text)  # Load data\n",
    "\n",
    "# Parse the JSON and preview it\n",
    "print(type(data), len(data))\n",
    "\n",
    "def json_print(data):\n",
    "    print(json.dumps(data, indent=2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "04887b2b-3dfd-4efd-b224-f7fde76c8b40",
   "metadata": {
    "height": 285
   },
   "outputs": [],
   "source": [
    "import weaviate, os\n",
    "from weaviate import EmbeddedOptions\n",
    "import openai\n",
    "\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "_ = load_dotenv(find_dotenv()) # read local .env file\n",
    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
    "\n",
    "client = weaviate.Client(\n",
    "    embedded_options=EmbeddedOptions(),\n",
    "    additional_headers={\n",
    "        \"X-OpenAI-Api-BaseURL\": os.environ['OPENAI_API_BASE'],\n",
    "        \"X-OpenAI-Api-Key\": openai.api_key,  # Replace this with your actual key\n",
    "    }\n",
    ")\n",
    "print(f\"Client created? {client.is_ready()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11eedab7-bf2a-4d04-a1da-97ce901e3ce8",
   "metadata": {
    "height": 319
   },
   "outputs": [],
   "source": [
    "# Uncomment the following two lines if you want to run this block for a second time.\n",
    "if client.schema.exists(\"Question\"):\n",
    "   client.schema.delete_class(\"Question\")\n",
    " \n",
    "class_obj = {\n",
    "    \"class\": \"Question\",\n",
    "    \"vectorizer\": \"text2vec-openai\",  # Use OpenAI as the vectorizer\n",
    "    \"moduleConfig\": {\n",
    "        \"text2vec-openai\": {\n",
    "            \"model\": \"ada\",\n",
    "            \"modelVersion\": \"002\",\n",
    "            \"type\": \"text\",\n",
    "            \"baseURL\": os.environ[\"OPENAI_API_BASE\"]\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "client.schema.create_class(class_obj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c01d814-74d8-43cd-a8e7-7e25a905f86a",
   "metadata": {
    "height": 268
   },
   "outputs": [],
   "source": [
    "with client.batch.configure(batch_size=5) as batch:\n",
    "    for i, d in enumerate(data):  # Batch import data\n",
    "        \n",
    "        print(f\"importing question: {i+1}\")\n",
    "        \n",
    "        properties = {\n",
    "            \"answer\": d[\"Answer\"],\n",
    "            \"question\": d[\"Question\"],\n",
    "            \"category\": d[\"Category\"],\n",
    "        }\n",
    "        \n",
    "        batch.add_data_object(\n",
    "            data_object=properties,\n",
    "            class_name=\"Question\"\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b833428c-83f9-4bdb-938a-c912ae62a060",
   "metadata": {},
   "source": [
    "## Queries"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e2f3380-3950-4256-b210-ed526b928ec6",
   "metadata": {},
   "source": [
    "### Dense Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e510be89-a1f3-40bf-9320-ba1c215a99dd",
   "metadata": {
    "height": 166
   },
   "outputs": [],
   "source": [
    "response = (\n",
    "    client.query\n",
    "    .get(\"Question\", [\"question\", \"answer\"])\n",
    "    .with_near_text({\"concepts\":[\"animal\"]})\n",
    "    .with_limit(3)\n",
    "    .do()\n",
    ")\n",
    "\n",
    "json_print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d50f60c7-0ed2-43a6-a2ec-05fc126200e7",
   "metadata": {},
   "source": [
    "### Sparse Search - BM25"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b5d3067-dbfb-4006-82af-168548e7e49f",
   "metadata": {
    "height": 166
   },
   "outputs": [],
   "source": [
    "response = (\n",
    "    client.query\n",
    "    .get(\"Question\",[\"question\",\"answer\"])\n",
    "    .with_bm25(query=\"animal\")\n",
    "    .with_limit(3)\n",
    "    .do()\n",
    ")\n",
    "\n",
    "json_print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "73ef743d-8b12-44ea-9322-8e7e9d3a0cd5",
   "metadata": {},
   "source": [
    "### Hybrid Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "baecafb4-95b3-4b1c-8d66-49aa2719a622",
   "metadata": {
    "height": 166
   },
   "outputs": [],
   "source": [
    "response = (\n",
    "    client.query\n",
    "    .get(\"Question\",[\"question\",\"answer\"])\n",
    "    .with_hybrid(query=\"animal\", alpha=0.5)\n",
    "    .with_limit(3)\n",
    "    .do()\n",
    ")\n",
    "\n",
    "json_print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cfaf68ae-dd85-4b04-8d81-1d9bdaf13fa8",
   "metadata": {
    "height": 166
   },
   "outputs": [],
   "source": [
    "response = (\n",
    "    client.query\n",
    "    .get(\"Question\",[\"question\",\"answer\"])\n",
    "    .with_hybrid(query=\"animal\", alpha=0)\n",
    "    .with_limit(3)\n",
    "    .do()\n",
    ")\n",
    "\n",
    "json_print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27698f80-8349-4016-a369-074a03009df7",
   "metadata": {
    "height": 166
   },
   "outputs": [],
   "source": [
    "response = (\n",
    "    client.query\n",
    "    .get(\"Question\",[\"question\",\"answer\"])\n",
    "    .with_hybrid(query=\"animal\", alpha=1)\n",
    "    .with_limit(3)\n",
    "    .do()\n",
    ")\n",
    "\n",
    "json_print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37608d01-21d5-4c9b-b262-6c8d5889e6b0",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d59fb9f7-323c-4a65-b274-f124ad484563",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1d932b2-d975-4b16-85c8-628d3cbd10da",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "502f0b76-0d43-4f5b-b187-f5b2bb62e0cc",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d35b9aa-6695-4489-8a35-8ac5c1b580c6",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4ad1c11-ad41-42b2-b84d-bc1a8869747c",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c56b70e3-83d9-4ed9-bbcc-60887c57f1eb",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02cf1ee8-cece-47b7-b85b-b20bcea2700f",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb67b098-ebee-43b3-96f8-b30337e2dc8e",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73325684-7027-4972-8660-3c326ba7571f",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "490ef7cc-141d-4f7a-a701-9c620e3d0c0f",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fee22aa-2469-42c5-8836-cb6adda72e96",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cc9f529-167f-4211-9f04-410ce0618c8a",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8907cba8-7ab5-4692-ae9e-cfd83835f3e2",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b200fc5-5698-457c-bbd7-5986ac29c2f9",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "940046bd-bc14-4098-b9d4-bc55bc31c3f3",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db661ce6-45c2-4d16-9174-5b7c56c2d83d",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea7110e9-5b27-448a-a883-9c2b6e8a2685",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94bcd30f-9b72-4c39-9347-7c3fa5415613",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1de6f9b-aae2-4165-b1a0-4c2864654396",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c9c03c58-b700-4685-86ac-163c78ca5954",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4a880a5-c40a-48a4-9422-8c73761a34ea",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed5516e7-d20a-4f0a-b4e6-1b90fc408491",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e98d1c8-d080-47d6-93e1-1ab12c4e2798",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f81277d-7c61-4be0-95d2-14560f9bf6a9",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cefb463e-90da-4446-8319-95b2ee1a341c",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8cc4aff0-312b-45cf-976b-60d3b515d79b",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f18389ee-dab3-421a-b75b-7700f460e1ab",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a3906b1-e81b-4290-bd12-1dff9149114a",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
